library(cytomapper)
library(dplyr)
library(ggplot2)
library(simpleSeg)
library(FuseSOM)
library(ggpubr)
library(scater)
library(spicyR)
#library(ClassifyR)
library(ClassifyR, lib.loc = "~/localPackages/")
library(scFeatures)
library(lisaClust)


#withr::with_libpaths(new = "~/localPackages/", devtools::install_github("SydneyBioX/ClassifyR"))

Global paramaters

nCores <- 20
BPPARAM <- simpleSeg:::generateBPParam(nCores)

Read in images

Our images are stored in the images folder within the Data folder. Here we use readImages() from the EBImage package to read these into R. If memory is a restricting factor, and the files are in a slightly different format, you could use loadImages() from the cytomapper package to load all of the tiff images into a CytoImageList object, which can store the images as h5 on-disk.

pathToImages <- "Data/images"

# Get directories of images
imageDirs <- dir(pathToImages, full.names = TRUE)
names(imageDirs) <- dir(pathToImages, full.names = FALSE)

# Get files in each directory
files <- sapply(imageDirs, list.files, pattern = "tif", full.names = TRUE, simplify = FALSE)

# Read files with readImage from EBImage
images <- lapply(files, EBImage::readImage, as.is = TRUE)

# We've found it useful to sqrt transform our images
images <- lapply(images, sqrt)

We will make use of the on_disk option to convert our images to a CytoImageList with the images not held in memory.

# Store images in a CytoImageList with images on_disk as h5 files to save memory. 
dir.create("Data/h5Files")
images <- CytoImageList(images, on_disk = TRUE, h5FilesPath = "Data/h5Files", BPPARAM = BPPARAM)
mcols(images) <- S4Vectors::DataFrame(imageID = names(images))
gc()
##            used  (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells 15461352 825.8   27433958  1465.2   20973335  1120.1
## Vcells 25292943 193.0 6813011224 51979.2 8516264030 64974.0
# images <- cytomapper::loadImages("Data/h5Files/", pattern = "h5", h5FilesPath = "Data/h5Files/",on_disk = TRUE)
# mcols(images) <- S4Vectors::DataFrame(imageID = names(images))

Segment the images

Run simpleSeg

masks <- simpleSeg(images,
                   nucleus = c("PCA", "HH3"),
                   cellBody = "dilate",
                   sizeSelection = 40,
                   discSize = 5,
                   cores = nCores)

Visualise separation

display(colorLabels(masks[[1]]))

Visualise outlines

plotPixels(image = images[1:3], 
           mask = masks[1:3], 
           img_id = "imageID", 
           colour_by = c("PanKRT", "GLUT1", "HH3", "CD3", "CD20"), 
           display = "single",
           colour = list(HH3 = c("black","blue"), 
                         CD3 = c("black","purple"),
                         CD20 = c("black","green"),
                         GLUT1 = c("black", "red"),
                         PanKRT = c("black", "yellow")),
           legend = NULL)

Summarise cell features.

cells <- cytomapper::measureObjects(masks, 
                                    images, 
                                    img_id = "imageID", 
                                    BPPARAM = BPPARAM)

Load the clinical data

Read the clinical data

clinical <- read.csv("Data/1-s2.0-S0092867421014860-mmc1.csv")
clinical <- clinical |>
  mutate(imageID = paste0("Point", PointNumber, "_pt", Patient_ID, "_", TMAD_Patient))
clinical$imageID[grep("normal", clinical$Tissue_Type)] <- paste0(clinical$imageID[grep("normal", clinical$Tissue_Type)], "_Normal")

clinicalVariables <- c("imageID", "Patient_ID","Status", "Age", "SUBTYPE", "PAM50", "Treatment", "DCIS_grade", "Necrosis")
rownames(clinical) <- clinical$imageID
clinical <- clinical[names(images), ]

Put the clinical data into the colData of SingleCellExperiment

colData(cells) <- cbind(colData(cells), clinical[cells$imageID, clinicalVariables])

Normalize data

  df <- as.data.frame(cbind(colData(cells), t(assay(cells, "counts"))))
  ggplot(df, aes(x = (PanKRT), colour = imageID)) + geom_density() + theme(legend.position = "none")

#cells <- normalizeCells(cells, transformation = "sqrt", assayIn = "counts")
#source("scMerge2.R")
cells <- normalizeCells(cells, transformation = c("sqrt"), method = c("quant99", "minMax"), assayIn = "counts", cores = nCores)



df <- as.data.frame(cbind(colData(cells), t(assay(cells, "norm"))))
 ggplot(df, aes(x = (PanKRT), colour = imageID)) + geom_density() + theme(legend.position = "none")

Cluster cells using FuseSOM package

Perform the clustering

cells@metadata <- list()
#The markers used in the original publication to gate cell types
useMarkers <- c("PanKRT", "ECAD", "CD45", "CK7", "VIM", "FAP", "CK5", "CD36",
                "CD31", "CD20", "CD4", "CD3", "CD8", "CD14", "CD11c", "CD68",
                "HLADRDPDQ", "SMA")

set.seed(51773)
cells <- runFuseSOM(cells, markers = useMarkers, assay = 'norm', numClusters = 25)

Check how many clusters should be used.

# As I've already run runFuseSOM I don't need to run generateSOM()
cells <- estimateNumCluster(cells, kseq = 2:30)
## 

========
================
========================
================================
========================================
================================================
========================================================
================================================================
========================================================================
================================================================================
optiPlot(cells, method = "gap")

plotGroupedHeatmap(cells, 
                   features = useMarkers, 
                   group = "clusters", 
                   exprs_values = "norm", 
                   center = TRUE, 
                   scale = TRUE, 
                   zlim = c(-2,2))

sort(table(colData(cells)$clusters))
## 
##  cluster_5 cluster_23 cluster_14  cluster_7  cluster_8 cluster_12  cluster_3 
##        129        338        358        364        377        439        490 
##  cluster_4 cluster_21  cluster_9 cluster_25 cluster_10  cluster_2 cluster_11 
##        539        553        571        787        967       1261       1398 
## cluster_19 cluster_16 cluster_18 cluster_17  cluster_6 cluster_15 cluster_24 
##       1417       1574       1653       1859       2051       2277       2559 
## cluster_22 cluster_20 cluster_13  cluster_1 
##       3200       4027       4991      28351
imagesToUse <- rownames(clinical)[clinical[, "Status"]%in%c("nonprogressor", "progressor")]
cellsToUse <- cells$Status%in%c("nonprogressor", "progressor")

testProp <- colTest(cells[, cellsToUse], 
                    condition = "Status", 
                    feature = "clusters",
                    type = "ttest")

testProp
##            mean in group nonprogressor mean in group progressor      t   pval
## cluster_22                      0.0650                  0.03700  2.800 0.0074
## cluster_17                      0.0380                  0.02600  2.100 0.0450
## cluster_19                      0.0270                  0.01400  2.000 0.0460
## cluster_23                      0.0033                  0.01100 -1.500 0.1600
## cluster_24                      0.0510                  0.03400  1.300 0.2200
## cluster_8                       0.0031                  0.00820 -1.200 0.2400
## cluster_1                       0.4500                  0.49000 -1.100 0.2700
## cluster_21                      0.0072                  0.00420  1.000 0.3100
## cluster_5                       0.0021                  0.00087  1.000 0.3100
## cluster_18                      0.0290                  0.02300  0.990 0.3300
## cluster_11                      0.0240                  0.02000  0.900 0.3800
## cluster_9                       0.0096                  0.01300 -0.880 0.3900
## cluster_16                      0.0200                  0.02600 -0.660 0.5200
## cluster_14                      0.0055                  0.00730 -0.590 0.5600
## cluster_15                      0.0340                  0.03000  0.530 0.6000
## cluster_20                      0.0580                  0.07200 -0.520 0.6100
## cluster_3                       0.0037                  0.00460 -0.460 0.6500
## cluster_25                      0.0110                  0.01300 -0.410 0.6800
## cluster_6                       0.0390                  0.03400  0.420 0.6800
## cluster_7                       0.0082                  0.00610  0.410 0.6800
## cluster_12                      0.0072                  0.01100 -0.410 0.6900
## cluster_2                       0.0160                  0.01800 -0.330 0.7500
## cluster_10                      0.0120                  0.01300 -0.310 0.7600
## cluster_13                      0.0710                  0.07200 -0.086 0.9300
## cluster_4                       0.0084                  0.00860 -0.087 0.9300
##            adjPval    cluster
## cluster_22    0.18 cluster_22
## cluster_17    0.38 cluster_17
## cluster_19    0.38 cluster_19
## cluster_23    0.81 cluster_23
## cluster_24    0.81 cluster_24
## cluster_8     0.81  cluster_8
## cluster_1     0.81  cluster_1
## cluster_21    0.81 cluster_21
## cluster_5     0.81  cluster_5
## cluster_18    0.81 cluster_18
## cluster_11    0.81 cluster_11
## cluster_9     0.81  cluster_9
## cluster_16    0.82 cluster_16
## cluster_14    0.82 cluster_14
## cluster_15    0.82 cluster_15
## cluster_20    0.82 cluster_20
## cluster_3     0.82  cluster_3
## cluster_25    0.82 cluster_25
## cluster_6     0.82  cluster_6
## cluster_7     0.82  cluster_7
## cluster_12    0.82 cluster_12
## cluster_2     0.83  cluster_2
## cluster_10    0.83 cluster_10
## cluster_13    0.93 cluster_13
## cluster_4     0.93  cluster_4
prop <- getProp(cells, feature = "clusters")
clusterToUse <- rownames(testProp)[1]

boxplot( prop[imagesToUse, clusterToUse] ~ clinical[imagesToUse, "Status"] )

set.seed(51773)
cells <- runUMAP(cells, subset_row = useMarkers, exprs_values = "norm")
plotReducedDim(cells[,colData(cells)$imageID %in% unique(colData(cells)$imageID)[c(c(1,20,40,50,60),c(1,20,40,50,60)+2)]], dimred="UMAP", colour_by="imageID")

plotReducedDim(cells[,colData(cells)$imageID %in% unique(colData(cells)$imageID)[c(c(1,20,40,50,60),c(1,20,40,50,60)+2)]], dimred="UMAP", colour_by="clusters")

spicyR: test spatial relationships

spicyTest <- spicy(cells[, cellsToUse], 
                   condition = "Status", 
                   cellType = "clusters",
                   imageID = "imageID",
                   spatialCoords = c("m.cx", "m.cy"),
                   Rs = c(20, 50, 100),
                   sigma = 50,
                   BPPARAM = BPPARAM)
topPairs(spicyTest, n = 10)
##                        intercept coefficient      p.value adj.pvalue       from
## cluster_25__cluster_1  -116.2825    63.12200 0.0007060349  0.1542898 cluster_25
## cluster_1__cluster_25  -113.7198    58.28772 0.0008580480  0.1542898  cluster_1
## cluster_25__cluster_13 -107.1412    86.90733 0.0010431800  0.1542898 cluster_25
## cluster_24__cluster_7  -160.8764    69.21451 0.0012028068  0.1542898 cluster_24
## cluster_13__cluster_25 -103.9409    84.83352 0.0012382808  0.1542898 cluster_13
## cluster_3__cluster_16  -161.8938    70.53442 0.0018794192  0.1951464  cluster_3
## cluster_16__cluster_3  -162.0196    61.66160 0.0030316699  0.2596910 cluster_16
## cluster_7__cluster_24  -159.4935    84.31748 0.0033347161  0.2596910  cluster_7
## cluster_5__cluster_14  -125.0648   503.79472 0.0043583270  0.3016931  cluster_5
## cluster_4__cluster_14  -127.3751   105.47747 0.0085322807  0.5315611  cluster_4
##                                to
## cluster_25__cluster_1   cluster_1
## cluster_1__cluster_25  cluster_25
## cluster_25__cluster_13 cluster_13
## cluster_24__cluster_7   cluster_7
## cluster_13__cluster_25 cluster_25
## cluster_3__cluster_16  cluster_16
## cluster_16__cluster_3   cluster_3
## cluster_7__cluster_24  cluster_24
## cluster_5__cluster_14  cluster_14
## cluster_4__cluster_14  cluster_14
signifPlot(spicyTest)

lisaClust: Find cellular neighbourhoods

cells <- lisaClust(cells, k = 10, Rs = c(20,50,100), sigma = 50,
                   spatialCoords = c("m.cx", "m.cy"), cellType = "clusters", BPPARAM = BPPARAM)
hatchingPlot(cells,
             useImages = "Point2206_pt1116_31620",
             cellType = "clusters",
             spatialCoords = c("m.cx", "m.cy"),
             window = "square",
             nbp = 200
            )

imagesToUse <- rownames(clinical)[clinical[, "Status"]%in%c("nonprogressor", "progressor")]

prop <- getProp(cells, feature = "region")
testRegion <- colTest(prop[imagesToUse,], condition = clinical[imagesToUse, "Status"])

testRegion
##             W   pval adjPval   cluster
## region_6  150 0.0024   0.024  region_6
## region_4  450 0.0085   0.042  region_4
## region_7  240 0.2300   0.640  region_7
## region_9  370 0.2900   0.640  region_9
## region_10 250 0.3200   0.640 region_10
## region_8  340 0.5600   0.870  region_8
## region_2  280 0.6400   0.870  region_2
## region_1  320 0.7800   0.870  region_1
## region_5  290 0.7800   0.870  region_5
## region_3  300 0.8900   0.890  region_3
boxplot(prop[imagesToUse,rownames(testRegion)[1]]~clinical[imagesToUse, "Status"])

regionMap(cells, cellType = "clusters", limit = c(0.2,5))

scFeatures: Test some different features

data <- scFeatures(cells, 
                   feature_types = c("proportion_raw", "gene_mean_celltype"),
                   sample = "imageID",
                   celltype = "clusters",
                   assay = "norm",
                   ncores = nCores )
## [1] "generating proportion raw features"
## [1] "generating gene mean celltype features"
imagesToUse <- rownames(clinical)[clinical[, "Status"]%in%c("nonprogressor", "progressor")]

test <- colTest(data$feature_gene_mean_celltype[imagesToUse,],
        condition = clinical[imagesToUse, "Status"])

test |> head()
##                     W    pval adjPval           cluster
## cluster_17--CD8   180 0.00020    0.20   cluster_17--CD8
## cluster_17--PDL1  170 0.00057    0.29  cluster_17--PDL1
## cluster_23--GLUT1 140 0.00110    0.37 cluster_23--GLUT1
## cluster_16--CD11c 240 0.00190    0.48 cluster_16--CD11c
## cluster_25--HER2  210 0.00240    0.48  cluster_25--HER2
## cluster_23--pS6   160 0.00390    0.49   cluster_23--pS6

ClassifyR: Classification

imagesToUse <- rownames(clinical)[clinical[, "Status"]%in%c("nonprogressor", "progressor")]


data[["regions"]] <- getProp(cells, "region")
data <- lapply(data, as.data.frame)

#Subset data
measurements <- lapply(data, function(x)x[imagesToUse, ])
names(measurements) <- c("prop", "mean", "region")

set.seed(51773)
cv <- crossValidate(measurements = measurements, 
                    outcome = clinical[imagesToUse, "Status"],
                    nCores = nCores,
                    nFeatures = 20,
                    classifier = "randomForest")

ROCplot(cv, comparison = "Assay Name")